phaletes <- read_excel("../data/New Home Study- SVOCs_University of Toronto_March2024.xlsx", sheet = "Passive Air - PDMS (pg.m-3)",range = "A5:N145")
## New names:
## • `` -> `...6`
colnames(phaletes)[which(names(phaletes) == "House ID")] <- "House_ID" #change column names
colnames(phaletes)[which(names(phaletes) == "Sample ID")] <- "Sample_ID" #change column names
colnames(phaletes)[which(names(phaletes) == "Period (month)")] <- "Period"
phaletes_detected <- phaletes[phaletes$Period %in% c(0, 3, 6, 9, 12), ]
phaletes_filled <- phaletes_detected %>%
mutate(
DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP)),
DPP = ifelse(DPP == "<DL", 101, as.numeric(DPP)),
DiBP = ifelse(DiBP == "<DL", 80, as.numeric(DiBP)),
DnBP = ifelse(DnBP == "<DL", 103, as.numeric(DPP)),
BzBP = ifelse(BzBP == "<DL", 87, as.numeric(BzBP)),
DEHP = ifelse(DEHP == "<DL", 75, as.numeric(DEHP)),
DnOP = ifelse(DnOP == "<DL", 69, as.numeric(DnOP)),
DiNP = ifelse(DiNP == "<DL", 102, as.numeric(DiNP))
) %>%
mutate(Period = as.numeric(as.character(Period))) %>%
arrange(House_ID)
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
complete_houses <- phaletes_filled %>%
group_by(House_ID) %>%
filter(n_distinct(Period) == 5) %>%
ungroup()
chemicals <- complete_houses[, c("House_ID","Period","DEP", "DPP", "DiBP", "DnBP", "BzBP", "DEHP", "DnOP","DiNP")]
aggregated_df <- chemicals %>%
group_by(Period) %>%
summarise(
DEP = mean(DEP),
DPP = mean(DPP),
DiBP = mean(DiBP),
DnBP = mean(DnBP),
BzBP = mean(BzBP),
DEHP = mean(DEHP),
DnOP = mean(DEHP),
DiNP = mean(DiNP))
# Convert the data frame to a time series object for each chemical
# the period column represents months with records at 0, 3, 6, 9, 12
# Set the start to the first period (month 0) of the first year and frequency to 5
chemicals_ts <- lapply(aggregated_df[, -1], function(x) ts(x, start = c(2020, 1), frequency = 5)) # 5 periods per year
# Normalize the data
chemicals_ts <- lapply(chemicals_ts, scale)
# Function to plot cross-correlation for a given pair of chemicals
plot_ccf <- function(ts1, ts2, name1, name2) {
ts1 <- as.numeric(ts1)
ts2 <- as.numeric(ts2)
ccf_result <- ccf(ts1, ts2,lag = 5,plot = FALSE)
print(ccf_result)
plot(ccf_result, main = paste("Cross-Correlation between", name1, "and", name2))
}
# Loop through each pair of chemicals and plot cross-correlation
chemical_names <- colnames(aggregated_df)[-1]
for(i in 1:(length(chemicals_ts) - 1)) {
for(j in (i + 1):length(chemicals_ts)) {
plot_ccf(chemicals_ts[[i]], chemicals_ts[[j]], chemical_names[i], chemical_names[j])
}
}
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.109 -0.175 -0.399 -0.284 0.803 0.592 -0.201 -0.425 -0.021
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.066 -0.225 -0.359 0.065 0.715 0.688 -0.201 -0.587 -0.030
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.074 -0.245 -0.376 -0.155 0.905 0.473 -0.231 -0.424 -0.021
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.230 -0.046 -0.356 -0.633 0.613 0.544 0.031 -0.364 -0.019
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.327 0.190 -0.189 -0.959 0.070 0.385 0.363 -0.177 -0.010
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.327 0.190 -0.189 -0.959 0.070 0.385 0.363 -0.177 -0.010
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.168 -0.331 -0.262 0.337 0.777 0.453 -0.148 -0.626 -0.032
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.078 -0.246 -0.293 0.287 0.868 0.271 -0.634 -0.354 0.178
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.087 -0.309 -0.444 0.288 0.980 0.001 -0.504 -0.226 0.127
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.270 -0.118 -0.630 -0.213 0.915 0.253 -0.319 -0.272 0.113
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.384 0.131 -0.596 -0.710 0.494 0.437 0.050 -0.251 0.060
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.384 0.131 -0.596 -0.710 0.494 0.437 0.050 -0.251 0.060
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.197 -0.341 -0.050 0.512 0.704 0.118 -0.531 -0.406 0.192
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.121 -0.447 -0.521 0.418 0.854 0.228 -0.326 -0.249 -0.077
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.376 -0.220 -0.746 -0.067 0.663 0.394 -0.152 -0.178 -0.068
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.534 0.103 -0.705 -0.607 0.182 0.454 0.106 -0.031 -0.037
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.534 0.103 -0.705 -0.607 0.182 0.454 0.106 -0.031 -0.037
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.274 -0.434 -0.077 0.484 0.920 0.261 -0.430 -0.335 -0.116
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.268 -0.101 -0.564 -0.363 0.859 0.357 -0.219 -0.316 0.077
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.381 0.153 -0.485 -0.825 0.381 0.438 0.155 -0.241 0.041
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.381 0.153 -0.485 -0.825 0.381 0.438 0.155 -0.241 0.041
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.195 -0.351 -0.121 0.478 0.756 0.230 -0.428 -0.498 0.130
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.338 -0.077 -0.508 -0.531 0.798 0.284 -0.082 -0.351 0.127
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.338 -0.077 -0.508 -0.531 0.798 0.284 -0.082 -0.351 0.127
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.173 -0.202 0.084 0.522 0.449 -0.230 -0.562 -0.291 0.404
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## 0.181 -0.358 -0.302 -0.021 1.000 -0.021 -0.302 -0.358 0.181
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.093 0.054 0.287 0.382 -0.066 -0.697 -0.476 0.034 0.574
##
## Autocorrelations of series 'X', by lag
##
## -4 -3 -2 -1 0 1 2 3 4
## -0.093 0.054 0.287 0.382 -0.066 -0.697 -0.476 0.034 0.574
### Positive Lags (1, 2, 3): second series is lagged behind the first
series ### Negative Lags (-1, -2, -3): first series is lagged behind the
second series
# Loop through each pair of chemicals and plot cross-correlation
chemical_columns <- colnames(phaletes_filled)[7:14] # Adjust this based on the actual column indices
phaletes_normalized <- phaletes_filled
phaletes_normalized[chemical_columns] <- lapply(phaletes_filled[chemical_columns], scale)
# Loop through each pair of chemicals and plot cross-correlation
for (i in 1:(length(chemical_columns) - 1)) {
for (j in (i + 1):length(chemical_columns)) {
p <- ggplot(phaletes_normalized, aes_string(x = chemical_columns[i], y = chemical_columns[j])) +
geom_point(color = "#69b3a2") +
geom_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
theme_ipsum() +
ggtitle(paste("Scatter Plot with Linear Regression between", chemical_columns[i], "and", chemical_columns[j]))
print(p)
}
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
### 1. DPP and DnBP are significantly correlated 2. (DnOP and DiNP),
(DPP, DiNP)are weakly correlated 3.